#Author: Linda Sommerfeld
#Date: January 2023
#Related manuscript: Visual Context Affects Children’s and Adults’ Cognitive Load Engaged in Predictive Language Processing




################################################################################################################################################################################################
### OVERVIEW ###################################################################################################################################################################################
################################################################################################################################################################################################

### PREPARING 
  # Generate regions of interest
  # Bring data intro long format
  # Remove particular observations
       # Observations for which comprehension question was answered incorrectly
       # Outlier
  # Formatting
       # Round ICA values
       # Bring variables into correct format for plot & analysis
  # Plot data

### ANALYSIS
  # Contrast coding (factors condition & age group)
  # Control model on baseline ica values
  # Contrast coding (factor region of interest)
  # Run generalized linear mixed effect models (GLMERs) 
       # Model with all factors: ica ~ condition * region * age group
       # Model splits by age group & by age group AND region

### DESCREPTIVE INFORMATION 
  # Age, gender, psychometric tests




################################################################################################################################################################################################
### LOAD LIBRARIES, SET WORKING DIRECTION, LOAD DATA ###########################################################################################################################################
################################################################################################################################################################################################

library(lme4)
library(ggplot2)
library(psych)
library(lattice)
library(influence.ME)
library(plyr)
library(dplyr)
library(tidyr)
library(tibble)
library(readr)
library(anytime)
library(languageR)
library(stringr)
library(Rmisc)
library(psych)
library(lmerTest)
library(reshape)
library(ez)
library(MASS)
options(scipen =999)

setwd("")
d <- read.csv2("ica_for publication_preprocessed.csv") 


####################################
### EXPLANATION OF THE DATA FILE ###
####################################

colnames(d)

#group =      age group --> Children & Adults
#subject =    experimental subject ID
#item =       number of item (range = 1 - 32)
#condition =  visual condition --> 0-, 1-, 3-, or 4-consistent

#B1 to P20 =  each of these columns shows a 100ms bin during the trial course, while each column shows the sum of  ica values of both eyes obtained in the respective 100ms bin

#B1 to B20 =  ica values obtained during the preview of the visual scene that lasted for 2000ms
#S1 to S19 =  ica values obtained during the subject of the sentences
#V1 to V15 =  ica values obtained during the verb of the sentences
#G1 to G15 =  ica values obtained during the spill-over word "gleich" of the sentences
#A1 to A8 =   ica values obtained during the article of the sentences
#N1 to N18 =  ica values obtained during the noun of the sentences
#P1 to P29 =  ica values obtained during the postview of the visual scene

#accu =       accuracy (1 = correct, 0 = incorrect) of the respective subject for the comprehension question of the respective item
#age =        age of participant
#gender =     gender of participant (1 = male, 2 = female)
#ppvt =       score in the Peabody Picture Vocabulary Test
#svft =       score in the Semantic Verbal Fluency Task
#cnt =        score in the Color Naming Task




################################################################################################################################################################################################
### PREPARING ##################################################################################################################################################################################
################################################################################################################################################################################################

##################################
### CREATE REGIONS OF INTEREST ### (non-overlapping, 600ms in length) 
##################################

d <- mutate(d, sw = S6+S7+S8+S9+S10+S11) #BASELINE region: Ica values obtained during subejct presentation (e.g., "Vater"). It starts 500ms after subject onset to give participants time to get used to the change from visual to visuo-linguistic input
d <- mutate(d, vw = V2+V3+V4+V5+V6+V7)   #VERB region: Ica values obtained during verb presentation (e.g., "isst"). Starts 100ms after verb onset, as it takes ~ 100ms to recognize a word
d <- mutate(d, gw = G2+G3+G4+G5+G6+G7)   #SPILL-OVER region: Ica values obtained during spill-over word presentation (e.g., "gleich"). Starts 100ms after spill-over word onset, as it takes ~ 100ms to recognize a word
d <- mutate(d, nw = N2+N3+N4+N5+N6+N7)   #Noun region: Ica values obtained during noun presentation (e.g., "Waffel"). Starts 100ms after noun onset, as it takes ~ 100ms to recognize a word



             
###################################
### BRING DATA INTO LONG FORMAT ###
###################################

d$r1 <- d$sw
d$r2 <- d$vw
d$r3 <- d$gw
d$r4 <- d$nw

d <- gather(d, region, ica, r1:r4)

#rename regions for easier handling
d <- mutate(d, region = ifelse(region == "r1", "1",          # 1 = baseline (= subject) 
                        ifelse(region == "r2", "2",          # 2 = verb 
                        ifelse(region == "r3", "3",          # 3 = spill-pver ("gleich")
                                               "4"))))       # 4 = noun
                          

d <- d[,c("group", "subject", "item", "condition", "region", "ica", "age", "ppvt", "svft", "cnt", "accu")]


####################################
### EXPLANATION OF THE DATA FILE ###
####################################

colnames(d)

#group =      age group --> Children & Adults
#subject =    experimental subject ID
#item =       number of item (range = 1 - 32)
#condition =  visual condition --> 0-, 1-, 3-, or 4-consistent
#region =     region of interest --> 1 (= baseline), 2 (= verb), 3 (= spill-over), or 4 (= noun)
#ica =        sum of ica values across all time bins in the respective time region
#accu =       accuracy (1 = correct, 0 = incorrect) of the respective subject for the comprehension question of the respective item
#age =        age of participant
#gender =     gender of participant (1 = male, 2 = female)
#ppvt =       score in the Peabody Picture Vocabulary Test
#svft =       score in the Semantic Verbal Fluency Task
#cnt =        score in the Color Naming Task




##########################################
### REMOVAL OF PARTICULAR OBSERVATIONS ###
##########################################

###
### REMOVE CASES WITH INCORRECT RESPONSES TO COMPREHENSION QUESTION
###

d <- subset(d, accu != "0")


###
### OUTLIER REMOVAL WITH 1.5 * INTERQUARTIL RANGE METHOD
###

#outliers are detected per AGE group, condition, and time region & then replaced with the median of the respective group, condition, and time region 
d <- d %>% 
  group_by(group, condition, region) %>% 
  mutate(ica_clean = ifelse(ica < quantile(ica, probs = .25) - 1.5 * IQR(ica, na.rm = TRUE) | 
                            ica > quantile(ica, probs = .75) + 1.5 * IQR(ica, na.rm = TRUE), median(ica), ica))


###
### ROUND ICA VALUES --> THIS IS REQUIRED FOR GMLERs
###

d$ica_clean <- round(d$ica_clean, digits = 0)


###
### SOME FORMATTING REQUIRED FOR PLOTS & ANALYSIS
###

d$region <- as.factor(d$region)
d$condition <- as.factor(d$condition)




#################
### PLOT DATA ###
################# 

plot <- summarySE(d, measurevar="ica_clean", groupvars=c("condition", "region", "group"), na.rm=T)

labs <- c("Baseline
(father)", "Verb
(eats)", "Spill-over
(soon)", "Noun
(waffle)")

names(labs) <- c("1", "2", "3", "4")

ggplot(plot, aes(condition, ica_clean, fill = condition)) +
  geom_col(position = position_dodge(width = .8), width = .8) +
  facet_grid(group~region, labeller = labeller(region = labs)) +
  geom_errorbar(aes(ymin=ica_clean-se, ymax=ica_clean+se), position = position_dodge(.8), width = .15) + 
  theme_bw()+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank())+ 
  theme(strip.background = element_rect(color="black", fill="white", size=0.5, linetype="solid"))+ 
  theme(strip.text.x = element_text(size = 10, color = "black"))+ 
  theme(strip.text.y = element_text(size = 10, color = "black"))+ 
  xlab("Visual Condition") +
  ylab("Averaged ICA Values")+
  theme(axis.title.x = element_text(size = 10))+
  theme(axis.title.y = element_text(size = 10))+
  theme(axis.text.x = element_text(size =10))+
  theme(axis.text.y = element_text(size =10)) +
  coord_cartesian(ylim=c(25, 40))+
  scale_fill_grey() +
  theme(legend.position = "none")

rm(plot, labs)




################################################################################################################################################################################################
### ANALYSIS ###################################################################################################################################################################################
################################################################################################################################################################################################

#######################################################
### CONTRAST CODING (FACTORS CONDITION & AGE GROUP) ###
#######################################################

#generate three CONDITION contrasts (v1 = 0:1, v2 = 1:3+4, v3 = 3:4)
d <- mutate(d, v1 = ifelse(condition == "3", 0, ifelse(condition == "4", 0, ifelse(condition == "1", 1/2, -1/2))),
               v2 = ifelse(condition == "0", 0, ifelse(condition == "1", 1/3, -2/3)),
               v3 = ifelse(condition == "0", 0, ifelse(condition == "1", 0, ifelse(condition == "3", 1/2, -1/2))))

#generate one AGE GROUP contrast (adults:kids)
d <- mutate(d, a = ifelse(group == "Adults", 1/2, -1/2))




##############################
### BASELINE CONTROL MODEL ###
##############################

#generate subset with cases in baseline region only
s <- subset(d, region == "1")

#run GLMER baseline ica ~ condition * age group
baseline_model <- glmer(ica_clean ~ (v1+v2+v3) * a  +
                        (1 + (v1+v2+v3) || subject)+
                        (1 + (v1+v2+v3) * a   || item),
                      family = poisson(link = "log"),
                      data = s)
summary(baseline_model)
confint(baseline_model, method="Wald")
rm(s, baseline_model)

#as there was no effect of the visual condition in the baseline region, we do not include this region in further analyses and remove it from the data set
d <- subset(d, region != "1") 




###################################################
### CONTRAST CODING (FACTOR REGION OF INTEREST) ###
###################################################

#generate REGION contrasts (t1 = verb:spill-over, t2 = spill-over:noun)
d <- mutate(d, t1 = ifelse(region == "4", 0, ifelse(region == "2", 1/2, -1/2)),
               t2 = ifelse(region == "2", 0, ifelse(region == "3", 1/2, -1/2)))
 



###########################################################
### "MAIN" MODEL WITH THE FACTORS CONDITION & AGE GROUP ###
###########################################################

m <- glmer(ica_clean ~ (v1+v2+v3) * (t1+t2) * a  +
                  (1 + (v1+v2+v3) * (t1+t2)  || subject)+
                  (1 + (v1+v2+v3) * (t1+t2) * a || item),
           family = poisson(link = "log"),
           data = d)
summary(m)
confint(m, method="Wald")
rm(m)




##########################################
### MODEL SPLITS BY AGE GROUP & REGION ###
##########################################

###
### ADULTS
###

#create subsets of data
a  <- subset(d, group == "Adults")  #adults
av <- subset(a, region == "2")      #adults, verb region
as <- subset(a, region == "3")      #adults, spill-over region
an <- subset(a, region == "4")      #adults, noun region


#model for adults with all time regions
adults <- glmer(ica_clean ~ (v1+v2+v3) * (t1+t2)  +
                       (1 + (v1+v2+v3) * (t1+t2)  || subject)+
                       (1 + (v1+v2+v3) * (t1+t2)  || item),
            family = poisson(link = "log"),
            data = a)
summary(adults)
confint(adults, method="Wald")
rm(adults)


#model for adults for verb region
adu_verb <- glmer(ica_clean ~ (v1+v2+v3)   +
                         (1 + (v1+v2+v3) || subject)+
                         (1 + (v1+v2+v3) || item),
             family = poisson(link = "log"),
             data = av)
summary(adu_verb)
confint(adu_verb, method="Wald")
rm(adu_verb)


#model for adults for spill-over region
adu_spill <- glmer(ica_clean ~ (v1+v2+v3)   +
                          (1 + (v1+v2+v3) || subject)+
                          (1 + (v1+v2+v3) || item),
             family = poisson(link = "log"),
             data = as)
summary(adu_spill)
confint(adu_spill, method="Wald")
rm(adu_spill)


#model for adults for noun region
adu_noun <- glmer(ica_clean ~ (v1+v2+v3)   +
                         (1 + (v1+v2+v3) || subject)+
                         (1 + (v1+v2+v3) || item),
             family = poisson(link = "log"),
             data = an)
summary(adu_noun)
confint(adu_noun, method="Wald")
rm(adu_noun, a, av, as, an)




###
### CHILDREN
###

#create subsets of data
k  <- subset(d, group == "Children")   #children
kv <- subset(k, region == "2")         #children, verb region
ks <- subset(k, region == "3")         #children, spill-over region
kn <- subset(k, region == "4")         #children, noun region


#model for children with all time regions
kids <- glmer(ica_clean ~ (v1+v2+v3) * (t1+t2)  + scale(svft) +
              (1 + (v1+v2+v3) * (t1+t2)  || subject)+
              (1 + (v1+v2+v3) * (t1+t2)  || item),
            family = poisson(link = "log"),
            data = k)
summary(kids)
confint(kids, method="Wald")


#model for children for verb region
kdis_verb <- glmer(ica_clean ~ (v1+v2+v3)   +
               (1 + (v1+v2+v3) || subject)+
               (1 + (v1+v2+v3) || item),
             family = poisson(link = "log"),
             data = kv)
summary(kdis_verb)
confint(kids_verb, method="Wald")
rm(kids_verb)


#model for children for spill-over region
kids_spill <- glmer(ica_clean ~ (v1+v2+v3)   +
               (1 + (v1+v2+v3) || subject)+
               (1 + (v1+v2+v3) || item),
             family = poisson(link = "log"),
             data = ks)
summary(kids_spill)
confint(kids_spill, method="Wald")
rm(kids_spill)


#model for children for noun region
kids_noun <- glmer(ica_clean ~ (v1+v2+v3)   +
               (1 + (v1+v2+v3) || subject)+
               (1 + (v1+v2+v3) || item),
             family = poisson(link = "log"),
             data = kn)
summary(kids_noun)
confint(kids_noun, method="Wald")
rm(kids_noun, k, kv, ks, kn)




######################################
### DESCRIPTIVE SAMPLE INFORMATION ###
######################################

#N = 59, n = 23 children, n = 36 adults
count(d, subject)

#change format 
d$age <- as.numeric(d$age)
d$ppvt <- as.numeric(d$ppvt)
d$svft <- as.numeric(d$svft)
d$ppvt <- as.numeric(d$ppvt)

#calculate descriptive values
summarise(group_by(d, group), agemean=mean(age, na.rm = T), agesd=sd(age, na.rm = T), agerange = range(age, na.rm = T))
summarise(group_by(d, group), agemean=mean(cnt, na.rm = T), agesd=sd(cnt, na.rm = T), agerange = range(cnt, na.rm = T))
summarise(group_by(d, group), agemean=mean(ppvt, na.rm = T), agesd=sd(ppvt, na.rm = T), agerange = range(ppvt, na.rm = T))
summarise(group_by(d, group), agemean=mean(svft, na.rm = T), agesd=sd(svft, na.rm = T), agerange = range(svft, na.rm = T))